Project: Deep Learning with Convolution Neural Networks (classify images of Zalando fashion articles)¶

Problem:¶

Classify images of Zalando clothing articles: 'T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat', 'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot'.
Multiclass classification of images with deep learning and Convolution Neural Networks

Tools:¶

Tensorflow Sequential models
Keras Conv2D, BatchNormalization, Dropout and MaxPool2D layers
sklearn kfold validation and predictions

import numpy as np
import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec

import myML_functions as myML_functions

import tensorflow as tf
from keras import metrics
from keras.models import Sequential
from keras.layers import Dense, Conv2D, Flatten, MaxPool2D, Dropout, BatchNormalization
from keras.utils import to_categorical
from keras.callbacks import ModelCheckpoint

from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import cross_val_score, cross_validate, cross_val_predict
from sklearn.model_selection import KFold

from sklearn.metrics import classification_report, confusion_matrix
from sklearn.metrics import make_scorer, f1_score, recall_score, precision_score, accuracy_score

print('defaults loaded')

defaults loaded

Using TensorFlow backend.

1 - Load data¶

labels = ['T-shirt', 'Trouser', 'Pullover', 'Dress', 'Coat', 'Sandal', 'Shirt', 'Sneaker', 
          'Bag', 'boot'] 
train_data = pd.read_csv('./data/fashion-mnist_train.csv')
train_data = train_data

test_data = pd.read_csv('./data/fashion-mnist_test.csv')
test_data = test_data
train_features = np.array(train_data.iloc[:,1:])
train_features = np.reshape(train_features,(len(train_data),28,28,1))
train_target = to_categorical(train_data.iloc[:,0])

test_features = np.array(test_data.iloc[:,1:])
test_features = np.reshape(test_features,(len(test_data),28,28,1))
test_target = to_categorical(test_data.iloc[:,0])

img_rows = train_features.shape[1]
img_cols = train_features.shape[2]
print('reading done')

reading done

2 - Build Model¶

when metrics = 'accuracy', keras automatically chooses one of: binary accuracy, categorial accuracy or sparse categorial accuracy, depending on the types of inputs

gpu_fraction = 0.5
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_fraction)
sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))

model = Sequential()
model.add(Conv2D(200, kernel_size=3, activation='relu', input_shape=(28, 28, 1),  
                 padding='same', strides=2, dilation_rate=1))
#model.add(Dropout(rate=0.3))
model.add(BatchNormalization())
#model.add(MaxPool2D(2))

model.add(Conv2D(100, kernel_size=3, activation='relu', input_shape=(28, 28, 1),  
                 padding='same', strides=2, dilation_rate=1))
#model.add(Dropout(rate=0.3))
model.add(BatchNormalization())
#model.add(MaxPool2D(2))

model.add(Conv2D(30, kernel_size=3, activation='relu', input_shape=(28, 28, 1),  
                 padding='same', strides=2, dilation_rate=1))
#model.add(Dropout(rate=0.3))
model.add(BatchNormalization())
#model.add(MaxPool2D(2))

model.add(Flatten())
model.add(Dense(10, activation='softmax'))

checkpoint = ModelCheckpoint('cnn_weights.hdf5', monitor='val_loss', save_best_only=True)

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
callbacks_list = [checkpoint]
model.fit(train_features, train_target, validation_split=0.2, epochs=15, batch_size=1000, verbose=2,
          callbacks=callbacks_list)

Train on 48000 samples, validate on 12000 samples
Epoch 1/15
 - 4s - loss: 0.5924 - acc: 0.7941 - val_loss: 0.4016 - val_acc: 0.8613
Epoch 2/15
 - 3s - loss: 0.3411 - acc: 0.8790 - val_loss: 0.3658 - val_acc: 0.8701
Epoch 3/15
 - 3s - loss: 0.2817 - acc: 0.9000 - val_loss: 0.3417 - val_acc: 0.8762
Epoch 4/15
 - 3s - loss: 0.2450 - acc: 0.9126 - val_loss: 0.3092 - val_acc: 0.8884
Epoch 5/15
 - 3s - loss: 0.2106 - acc: 0.9274 - val_loss: 0.3128 - val_acc: 0.8901
Epoch 6/15
 - 3s - loss: 0.1880 - acc: 0.9330 - val_loss: 0.2915 - val_acc: 0.8982
Epoch 7/15
 - 3s - loss: 0.1675 - acc: 0.9434 - val_loss: 0.2901 - val_acc: 0.8972
Epoch 8/15
 - 3s - loss: 0.1443 - acc: 0.9518 - val_loss: 0.3160 - val_acc: 0.8900
Epoch 9/15
 - 3s - loss: 0.1345 - acc: 0.9549 - val_loss: 0.3047 - val_acc: 0.8978
Epoch 10/15
 - 3s - loss: 0.1144 - acc: 0.9627 - val_loss: 0.2961 - val_acc: 0.9035
Epoch 11/15
 - 3s - loss: 0.1024 - acc: 0.9665 - val_loss: 0.3042 - val_acc: 0.8988
Epoch 12/15
 - 3s - loss: 0.0858 - acc: 0.9725 - val_loss: 0.3067 - val_acc: 0.9047
Epoch 13/15
 - 3s - loss: 0.0737 - acc: 0.9779 - val_loss: 0.3371 - val_acc: 0.8954
Epoch 14/15
 - 3s - loss: 0.0635 - acc: 0.9809 - val_loss: 0.3434 - val_acc: 0.8953
Epoch 15/15
 - 3s - loss: 0.0569 - acc: 0.9833 - val_loss: 0.3520 - val_acc: 0.8955

<keras.callbacks.History at 0x7f324c7b5630>

3 - Evaluate¶

model.load_weights('cnn_weights.hdf5')
score = model.evaluate(test_features, test_target, batch_size=10)
print(f"Evaluation test accuracy: {score[1]:0.2f}")

predictions = model.predict_classes(test_features)
test_decoded = np.argmax(test_target, axis=1)

accuracy = accuracy_score(test_decoded, predictions)
precision = precision_score(test_decoded, predictions, average='weighted')
recall = recall_score(test_decoded, predictions,  average='weighted')
f1 = f1_score(test_decoded, predictions,  average='weighted')
print(f"accuracy: {accuracy:0.3f}, precision: {precision:0.3f}, recall: {recall:0.3f}, f1: {f1:0.3f}\n")

print(classification_report(test_decoded, predictions, target_names=labels))
cm = confusion_matrix(test_decoded, predictions, labels=list(range(0,len(labels))))
myML_functions.print_cm(cm, labels=labels)

10000/10000 [==============================] - 2s 249us/step
Evaluation test accuracy: 0.90
accuracy: 0.904, precision: 0.904, recall: 0.904, f1: 0.904

              precision    recall  f1-score   support

     T-shirt       0.84      0.88      0.86      1000
     Trouser       0.99      0.98      0.98      1000
    Pullover       0.87      0.83      0.85      1000
       Dress       0.92      0.91      0.91      1000
        Coat       0.85      0.86      0.85      1000
      Sandal       0.96      0.95      0.96      1000
       Shirt       0.75      0.75      0.75      1000
     Sneaker       0.93      0.96      0.94      1000
         Bag       0.97      0.97      0.97      1000
        boot       0.96      0.94      0.95      1000

   micro avg       0.90      0.90      0.90     10000
   macro avg       0.90      0.90      0.90     10000
weighted avg       0.90      0.90      0.90     10000

   true / pred  T-shirt  Trouser Pullover    Dress     Coat   Sandal    Shirt  Sneaker      Bag     boot 
       T-shirt    879.0      0.0     15.0     12.0      4.0      3.0     77.0      0.0     10.0      0.0 
       Trouser      4.0    981.0      1.0      5.0      3.0      0.0      3.0      0.0      3.0      0.0 
      Pullover     20.0      0.0    826.0     16.0     68.0      1.0     67.0      0.0      2.0      0.0 
         Dress     20.0      8.0      3.0    911.0     29.0      0.0     27.0      0.0      2.0      0.0 
          Coat      1.0      2.0     51.0     21.0    858.0      0.0     65.0      0.0      2.0      0.0 
        Sandal      1.0      1.0      0.0      0.0      0.0    954.0      0.0     29.0      3.0     12.0 
         Shirt    118.0      3.0     50.0     27.0     47.0      0.0    749.0      0.0      6.0      0.0 
       Sneaker      0.0      0.0      0.0      0.0      0.0     16.0      0.0    962.0      0.0     22.0 
           Bag      2.0      0.0      4.0      2.0      4.0      6.0      5.0      4.0    972.0      1.0 
          boot      0.0      0.0      0.0      0.0      0.0      9.0      1.0     44.0      1.0    945.0

fig, ax = plt.subplots(figsize=(16,5))
grid = gridspec.GridSpec(2,8)
grid.update(wspace=0.3)

for idx in range(0,16):
    subplot = plt.subplot(grid[idx])
    subplot.imshow(test_features[idx, :, :, 0])
    subplot.set_title(labels[predictions[idx]])

plt.show

<function matplotlib.pyplot.show>

4 - Validation with SKlearn¶

4.1 - Cross Validation with Kfold¶

def baseline_model():
    model = Sequential()
    model.add(Conv2D(10, kernel_size=3, activation='relu', input_shape=(28, 28, 1),  
                     padding='same', strides=2, dilation_rate=1))
    #model.add(Dropout(0.1))
    model.add(BatchNormalization())
    #model.add(MaxPool2D(2))
    
    model.add(Conv2D(10, kernel_size=3, activation='relu',  
                     padding='same', strides=2, dilation_rate=1))
    #model.add(Dropout(0.1))
    model.add(BatchNormalization())
    #model.add(MaxPool2D(2))
    model.add(Flatten())
    model.add(Dense(10, activation='softmax'))
    checkpoint = ModelCheckpoint('weights.hdf5', monitor='val_loss', save_best_only=True)

    model.compile(optimizer='adam', loss='categorical_crossentropy', 
                  metrics=['accuracy'])
    return model

estimator = KerasClassifier(build_fn=baseline_model, epochs=1, batch_size=5, verbose=True, validation_split=0.2)
seed = 1
kfold = KFold(n_splits=2, shuffle=True, random_state=seed)
results = cross_val_score(estimator, train_features, train_target, cv=kfold)
print("Baseline: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

Train on 24000 samples, validate on 6000 samples
Epoch 1/1
24000/24000 [==============================] - 45s 2ms/step - loss: 0.5349 - acc: 0.8087 - val_loss: 0.4376 - val_acc: 0.8422
30000/30000 [==============================] - 22s 745us/step
Train on 24000 samples, validate on 6000 samples
Epoch 1/1
24000/24000 [==============================] - 47s 2ms/step - loss: 0.5237 - acc: 0.8168 - val_loss: 0.4179 - val_acc: 0.8523
30000/30000 [==============================] - 30s 991us/step
Baseline: 85.14% (0.33%)

4.2 - cross_val_predict and sklearn metrics¶

kfold = KFold(n_splits=2, shuffle=True, random_state=seed)
predictions = cross_val_predict(estimator, train_features, train_target, cv=kfold)

Train on 24000 samples, validate on 6000 samples
Epoch 1/1
24000/24000 [==============================] - 53s 2ms/step - loss: 0.5316 - acc: 0.8100 - val_loss: 0.4546 - val_acc: 0.8360
30000/30000 [==============================] - 34s 1ms/step
Train on 24000 samples, validate on 6000 samples
Epoch 1/1
24000/24000 [==============================] - 52s 2ms/step - loss: 0.5286 - acc: 0.8107 - val_loss: 0.4359 - val_acc: 0.8417
30000/30000 [==============================] - 21s 691us/step

target_decoded = np.argmax(train_target, axis=1)
accuracy = accuracy_score(target_decoded, predictions)
precision = precision_score(target_decoded, predictions, average='weighted')
recall = recall_score(target_decoded, predictions,  average='weighted')
f1 = f1_score(target_decoded, predictions,  average='weighted')
print(f"accuracy: {accuracy:0.3f}, precision: {precision:0.3f}, recall: {recall:0.3f}, f1: {f1:0.3f}")

accuracy: 0.844, precision: 0.848, recall: 0.844, f1: 0.845

5 - Inspect Kernels¶

conv1 = model.layers[0]
weights1 = conv1.get_weights()
conv2 = model.layers[2]
weights2 = conv2.get_weights()
conv3 = model.layers[4]
weights3 = conv3.get_weights()


#first element is an array fo weights for the kernel
kernel1 = weights1[0]
kernel2 = weights2[0]
kernel3 = weights3[0]
print("kernel size, kernel size, Number of channels (colours), number of kernels in this layer")
print(weights1[0].shape)

# Pull out the first channel of the first kernel in the first layer
kernel1_1 = weights1[0][:,:,0, 0]
kernel2_1 = weights2[0][:,:,0, 0]
kernel3_1 = weights3[0][:,:,0, 0]
print(kernel1_1.shape)

plt.imshow(kernel1_1)
plt.imshow(kernel2_1)

kernel size, kernel size, Number of channels (colours), number of kernels in this layer
(3, 3, 1, 100)
(3, 3)

<matplotlib.image.AxesImage at 0x1400730b8>

def convolution(im, kernel):
    result = np.zeros((im.shape[0], im.shape[1]))
    for ii in range(im.shape[0] - kernel.shape[0]):
        for jj in range(im.shape[1] - kernel.shape[1]):
            result[ii, jj] = (im[ii:ii+kernel.shape[0], jj:jj+kernel.shape[1]] * kernel).sum()
    return result

test_image = test_features[0, :, :, 0]
plt.imshow(test_image)
filtered_image = convolution(test_image,kernel3_1)
plt.imshow(filtered_image)

<matplotlib.image.AxesImage at 0x1402b44a8>

6 - Custom metrics¶

import keras.backend as K

def c1_c2_c3(y_true, y_pred):
    # Count positive samples.
    c1 = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    c2 = K.sum(K.round(K.clip(y_pred, 0, 1)))
    c3 = K.sum(K.round(K.clip(y_true, 0, 1)))
    
    return(c1,c2,c3)

def f1_score(y_true, y_pred):

    (c1,c2,c3) = c1_c2_c3(y_true, y_pred)

    # If there are no true samples, fix the F1 score at 0.
    if c3 == 0:
        return 0

    # How many selected items are relevant?
    precision = c1 / c2

    # How many relevant items are selected?
    recall = c1 / c3

    # Calculate f1_score
    f1_score = 2 * (precision * recall) / (precision + recall)
    return f1_score

def precision(y_true, y_pred):
    
    (c1,c2,c3) = c1_c2_c3(y_true, y_pred)

    return c1 / c2

def recall(y_true, y_pred):
    
    (c1,c2,c3) = c1_c2_c3(y_true, y_pred)
    
    # If there are no true samples, fix the F1 score at 0.
    if c3 == 0:
        return 0
    
    return c1 / c3

def my_metric(y_true, y_pred):

    g = tf.subtract(tf.expand_dims(y_pred, -1), y_pred)
    g = tf.cast(g == 0.0, tf.float32) * 0.5 + tf.cast(g > 0.0, tf.float32)

    f = tf.subtract(tf.expand_dims(y_true, -1), y_true) > 0.0
    f = tf.matrix_band_part(tf.cast(f, tf.float32), -1, 0)

    g = tf.reduce_sum(tf.multiply(g, f))
    f = tf.reduce_sum(f)

    return tf.where(tf.equal(g, 0), 0.0, g/f) #select